
clear all
set matsize 4000
*set type double, permanently

*add file path
gl dat ""
gl texg ""

cap pr drop max
pr de max
	ren `1' `1'_
	bys id: egen `1'=max(`1'_)
	drop `1'_
end

use "$dat/ZEMIS_start.dta", clear

keep if (getF==1|getBC==1)

******************************************************
* create indicator for our regular estimation sample *
******************************************************
merge 1:1 id year using "$dat/permutation_indicator.dta"
foreach x in conat colan coeth {
ren share_`x'_cow share_`x'_cow_orig
ren `x'_cant_cow_pre `x'_cant_cow_pre_orig
}

drop _merge

* check sample size
sum year if ind==1  // 43675


keep year farrive bur_id emp industry2 id country  cant_res ethn_cd spr_cd   yearN emp_start woman ind share_c* conat* colan* coeth* 


************************
*** permutation test ***
************************
* test for share of co-workers who are co-nationals if individuals are randomly assigned to vacancies
* randomize among all vacancies in a canton-year cell
* companys' history of co-nationals is based on what's actually observed, but number of co-nationals in current year is affected by randomization
* share co-nationals decrease if we randomize history as well 
 
drop if mi(cant_res)

g bur_id_temp=bur_id 
replace bur_id_temp=. if bur_id==-2
g industry2_temp=industry2
replace industry2_temp=. if (industry2==-2 |industry2==-1) 

*note, some have bur_id/industry2 but are not coded as employed, all of them happen before year 2000
*compromise, only add if employment start date is the current year 
g emp_temp=emp
replace emp_temp=1 if emp==0 & year==int(emp_start/100) //  when work permit start
replace bur_id_temp=. if emp_temp==0
replace industry2_temp=. if emp_temp==0 

bys cant_res bur_id_temp id  (year): g uniq_firm=_n==1 if !mi(bur_id_temp) // each individual-employer observation

keep if uniq_firm==1 
g years_country=year-farrive


replace ethn_cd=. if ethn_cd ==-1 | ethn_cd ==998
replace spr_cd=. if spr_cd ==-1 | spr_cd ==998
replace country= . if country ==-1 | country ==998

cap drop num
set seed 1234
sort cant_res year woman years_country industry2, stable // important so that the num is always within the cell cant-year
g num=_n
foreach x in conat colan coeth {
cap mat drop x_`x'
if "`x'"=="conat" loc t country 
if "`x'"=="coeth" loc t ethn_cd
if "`x'"=="colan" loc t spr_cd
cap drop permutation_`x'
g permutation_`x'=.
forvalues i=1/1000 {
cap drop random firm_temp firm_hist 
cap drop cow_pre cant_cow_pre share_cow
g random=runiform()
sort cant_res year woman years_country industry2  random // important so that the num is always within the cell cant-year
g firm_temp=bur_id_temp[num] // no replacement, creates a random firm match where the individual gets the bur_id_temp in row=num

g cow_pre=.
g cant_cow_pre=.

qui forvalues y=2008/2017 {
cap drop firm_hist
g firm_hist=bur_id_temp
replace firm_hist=firm_temp if year==`y'
bys cant_res firm_hist `t' (year): egen cow_`y'=sum(uniq_firm) if !mi(firm_hist) & !mi(`t')  & year<=`y'
bys cant_res firm_hist  (year): egen cant_cow_`y'=sum(uniq_firm) if !mi(firm_hist) & !mi(`t') & year<=`y'
replace cow_pre=cow_`y' if year==`y' 
replace cant_cow_pre=cant_cow_`y' if year==`y' 
drop cow_`y' cant_cow_`y'
}

replace cow_pre=cow_pre-1 if !mi(cow_pre) 
g share_cow=(cow_pre)/(cant_cow_pre-1) if !mi(cow_pre) & !mi(cant_cow_pre) 

sum share_cow if ind==1 & uniq_firm==1 & cant_cow_pre>=2 & !mi(cant_cow_pre)  
mat x_`x' = nullmat(x_`x') \ r(mean), r(sd),.
replace permutation_`x'=r(mean) if num==`i'
display `i'
}
sum share_`x'_cow_orig if ind==1 & uniq_firm==1 & `x'_cant_cow_pre_orig>=2 & !mi(`x'_cant_cow_pre_orig)  
mat x_`x'[1,3]=r(mean)
mat coln x_`x' = mean sd observed
mat2txt , matrix(x_`x') saving("$texg/txt_files/permutation_`x'.txt") replace
}


***********************************
**** compare with actual number ***
***********************************

foreach x in conat colan coeth {
sum share_`x'_cow_orig if ind==1 & uniq_firm==1 & `x'_cant_cow_pre_orig>=2 & !mi(`x'_cant_cow_pre_orig)  
sum permutation_`x' if permutation_`x'>r(mean)
}
 

